Chapter 9 Joint Species Distribution Modelling - output analysis

rm(list=ls()) #clear environment
load("data/squirrels_data.Rdata")
options(contrasts = c('contr.sum','contr.poly'))

# Select desired support threshold
support_threshold=0.9
negsupport_threshold=1-support_threshold
# Select modelchain of interest
load("hmsc/fit_model.3a_250_10.Rdata")

levels.1a <- c("species","index500","season","logseqdepth","Random: animal")
levels.1b <- c("species","index500","season","logseqdepth","Random: animal", "Random: sampling_site")
levels.2a <- c("species","index500","season","species:index500","logseqdepth","Random: animal")
levels.2b <- c("species","index500","season","species:index500","logseqdepth","Random: animal", "Random: sampling_site")
levels.3a <- c("species","index500","season","species:season","logseqdepth","Random: animal")
levels.3b <- c("species","index500","season","species:season","logseqdepth","Random: animal", "Random: sampling_site")
levels.4a <- c("species","index500","season","species:index500","species:season","logseqdepth","Random: animal")
levels.4b <- c("species","index500","season","logseqdepth","species:index500","species:season","Random: animal", "Random: sampling_site")

9.1 Variance partitioning

# Compute variance partitioning
varpart=computeVariancePartitioning(m)

varpart$vals %>%
   as.data.frame() %>%
   rownames_to_column(var="variable") %>%
   pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
   mutate(variable=factor(variable, levels=levels.3a)) %>%
   group_by(variable) %>%
   summarise(mean=mean(value)*100,sd=sd(value)*100) %>%
   tt()
tinytable_sxb6d2paquessga8oifp
variable mean sd
species 27.912981 28.448176
index500 1.272544 2.046256
season 8.273125 5.025935
species:season 8.617260 5.298745
logseqdepth 3.573087 2.297375
Random: animal 50.351004 26.142128
# Basal tree
hmsc_tree <- genome_tree %>%
        keep.tip(., tip=m$spNames) 

# Varpart table
varpart_table <- varpart$vals %>%
   as.data.frame() %>%
   rownames_to_column(var="variable") %>%
   pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
   mutate(variable=factor(variable, levels=rev(levels.3a))) %>%
   mutate(genome=factor(genome, levels=rev(hmsc_tree$tip.label)))

# Phylums
phylums <- genome_metadata %>%
    filter(genome %in% hmsc_tree$tip.label) %>%
    arrange(match(genome, hmsc_tree$tip.label)) %>%
    mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
    column_to_rownames(var = "genome") %>%
    select(phylum)

# Basal ggtree
varpart_tree <- hmsc_tree %>%
        force.ultrametric(.,method="extend") %>%
        ggtree(., size = 0.3)
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
# Add phylum colors next to the tree tips
varpart_tree <- gheatmap(varpart_tree, phylums, offset=-0.2, width=0.1, colnames=FALSE) +
   scale_fill_manual(values=custom_colors)+
      labs(fill="Phylum")

#Reset fill scale to use a different colour profile in the heatmap
varpart_tree <- varpart_tree + new_scale_fill()

# Add variance stacked barplot
vertical_tree <-  varpart_tree +
        scale_fill_manual(values=c("#83bb90","#cccccc","#ed8a45","#b2b530","#be3e2b","#12738f","#f6de9c")) + #"#122f3d"
        geom_fruit(
             data=varpart_table,
             geom=geom_bar,
             mapping = aes(x=value, y=genome, fill=variable, group=variable),
             pwidth = 2,
             offset = 0.05,
             width= 1,
             orientation="y",
             stat="identity")+
      labs(fill="Variable")

vertical_tree

9.2 Posterior estimates

# Posterior estimates 

post_estimates <- getPostEstimate(hM=m, parName="Beta")$support %>%
    as.data.frame() %>%
    mutate(variable=m$covNames) %>%
    pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
    mutate(trend = case_when(
          value >= support_threshold ~ "Positive",
          value <= negsupport_threshold ~ "Negative",
          TRUE ~ "Neutral")) 

post_table <- post_estimates %>%
    mutate(genome=factor(genome, levels=rev(hmsc_tree$tip.label))) %>%
    select(-trend) %>%
    mutate(value = case_when(
          value >= support_threshold ~ "Positive",
          value <= negsupport_threshold ~ "Negative",
          TRUE ~ "Neutral")) %>%
    mutate(value=factor(value, levels=c("Positive","Neutral","Negative"))) %>%
    pivot_wider(names_from = variable, values_from = value) %>%
    rename(intercept=2,
            Sc=3,
            index500=4,
            autumn=5,
            winter=6,
            logseqdepth=7,
            "Sc:autumn"=8,
            "Sc:winter"=9) %>%
   column_to_rownames(var="genome") %>%
  select(-intercept) 
  
# Basal ggtree
postestimates_tree <- hmsc_tree %>%
        force.ultrametric(.,method="extend") %>%
        ggtree(., size = 0.3)
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
#Add phylum colors next to the tree tips
postestimates_tree <- gheatmap(postestimates_tree, phylums, offset=-0.2, width=0.1, colnames=FALSE) +
      scale_fill_manual(values=custom_colors)+
      labs(fill="Phylum")

#Reset fill scale to use a different colour profile in the heatmap
postestimates_tree <- postestimates_tree + new_scale_fill()

# Add posterior significant heatmap

postestimates_tree <- gheatmap(postestimates_tree, post_table, offset=0, width=0.5, colnames=TRUE, colnames_position="top",colnames_angle=90, colnames_offset_y=1, hjust=0) +
        scale_fill_manual(values=c("#be3e2b","#f4f4f4","#b2b530"))+
        labs(fill="Trend") 

postestimates_tree +
        vexpand(.30, 1)  # expand top 

9.3 Model fit

#NB: waiting for Antton to cross-validate the model

# MFCV <- evaluateModelFit(hM=m, predY=cv)
# 
# mean(MFCV$R2, na.rm = TRUE)
# 
# genome_fit <- tibble(genome=m$spNames, r2 = MFCV[[2]])
# 
# genome_counts_filt %>% 
# mutate_if(is.numeric, ~ . / sum(.)) %>% 
# left_join(genome_fit, by="genome") %>% 
# filter(r2>0.5) %>% 
# select(-c(genome,r2)) %>% 
# colSums() %>%  
# hist()
# 
# var_pred_table <- tibble(mag=m$spNames,
#        pred=MFCV$R2,
#        var_pred=MFCV$R2 * varpart$vals[1,],
#        support=getPostEstimate(hM=m, parName="Beta")$support %>% .[2,],
#        estimate=getPostEstimate(hM=m, parName="Beta")$mean %>% .[2,]) %>%
#   mutate(enrichment=ifelse(support>=support_threshold,"Feral","Neutral")) %>%
#   mutate(enrichment=ifelse(support<=negsupport_threshold,"Domestic",enrichment))
# 
# var_pred_table %>%
#   ggplot(aes(x=estimate,y=var_pred, color=enrichment))+
#     geom_point()+
#     scale_color_manual(values=c("#bd70ae","#949293","#ebe8e8"))+
#     geom_hline(yintercept=0.005, linetype="dashed")+
#     theme_minimal()
# 
# predictive_mags <- var_pred_table %>% 
#   filter(var_pred>=0.005) %>% 
#   pull(mag)

9.4 Correlations among bacteria

#Compute the residual correlation matrix
OmegaCor = computeAssociations(m)

#Co-occurrence matrix at the animal level
supportLevel = 0.95
toPlot = ((OmegaCor[[1]]$support>supportLevel)
          + (OmegaCor[[1]]$support<(1-supportLevel))>0)*OmegaCor[[1]]$mean

matrix <- toPlot %>% 
      as.data.frame() %>%
      rownames_to_column(var="genome1") %>%
      pivot_longer(!genome1, names_to = "genome2", values_to = "cor") %>%
      mutate(genome1= factor(genome1, levels=rev(hmsc_tree$tip.label))) %>%
      mutate(genome2= factor(genome2, levels=hmsc_tree$tip.label)) %>%
      ggplot(aes(x = genome1, y = genome2, fill = cor)) +
            geom_tile() + 
            scale_fill_gradient2(low = "#be3e2b",
                       mid = "#f4f4f4",
                       high = "#b2b530")+
            theme_void() +
            theme(legend.position = "none") 

corr.legend <- get_legend(matrix, position="right") 
corr.legend <- as_ggplot(corr.legend)

vtree <- hmsc_tree %>%
  force.ultrametric(.,method="extend") %>%
  ggtree(., expand=1.5) +
  hexpand(0.5) 
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
#Add phylum colors next to the tree tips
vtree <- gheatmap(vtree, phylums, offset=-0.1, width=0.6, colnames=FALSE) +
      scale_fill_manual(values=custom_colors) +
      theme(legend.position = 'none') + scale_y_reverse()

vtreeD <- hmsc_tree %>%
  force.ultrametric(.,method="extend") %>%
  ggtree(., expand=1.5, layout="dendrogram") 
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
#Add phylum colors next to the tree tips
vtreeD <- gheatmap(vtreeD, phylums, offset=-0.1, width=0.6, colnames=FALSE) +
      scale_fill_manual(values=custom_colors) +
      theme(legend.position = 'none') 

# properly align trees to corr matrix with package aplot
matrix %>%  insert_left(vtree, width=.25) %>% insert_top(vtreeD, height=.3) %>% insert_right(corr.legend, width=0.15)

9.5 Association with host species

estimate_sp <- getPostEstimate(hM=m, parName="Beta")$mean %>%
    as.data.frame() %>%
    mutate(variable=m$covNames) %>%
    filter(variable=="speciesSciurus carolinensis") %>%
    pivot_longer(!variable, names_to = "genome", values_to = "mean") %>%
    dplyr::select(genome,mean)

support_sp <- getPostEstimate(hM=m, parName="Beta")$support %>%
    as.data.frame() %>%
    mutate(variable=m$covNames) %>%
    filter(variable=="speciesSciurus carolinensis") %>%
    pivot_longer(!variable, names_to = "genome", values_to = "support") %>%
    dplyr::select(genome,support)

inner_join(estimate_sp,support_sp,by=join_by(genome==genome)) %>%
    mutate(significance=ifelse(support >= 0.9 | support <= 0.1,1,0)) %>%
    mutate(support=ifelse(mean<0,1-support,support)) %>%
    left_join(genome_metadata, by = join_by(genome == genome)) %>%
    mutate(phylum = ifelse(support > 0.9, phylum, NA)) %>%
    ggplot(aes(x=mean,y=support,color=phylum))+
      geom_point(alpha=0.7, shape=16, size=3)+
      scale_color_manual(values = alpha(custom_colors, 0.7)) +
      geom_vline(xintercept = 0) +
      xlim(c(-2,2)) +
      labs(x = "Beta regression coefficient", y = "Posterior probability") +
      theme_minimal()

9.5.1 MAGs associated with S. carolinensis

post_estimates %>%
    filter(variable=="speciesSciurus carolinensis", trend=="Positive") %>%
    arrange(-value) %>%
    left_join(genome_metadata,by=join_by(genome==genome)) %>%
    dplyr::select(genome,phylum,class,order,family,species,value) %>%
    arrange(phylum, class, family, species)%>%
  paged_table()

9.5.2 MAGs associated with S. vulgaris

post_estimates %>%
    filter(variable=="speciesSciurus carolinensis", trend=="Negative") %>%
    arrange(-value) %>%
    left_join(genome_metadata,by=join_by(genome==genome)) %>%
    dplyr::select(genome,phylum,class,order,family,species,value) %>%
    arrange(phylum, class, family, species)%>%
  paged_table()

9.6 Microbiome composition predictions

9.6.1 Predicted composition by species

# Species prediction in spring-summer
pred_sp_spring <- constructGradient(m, 
                      focalVariable = "species", 
                      non.focalVariables = list(logseqdepth=list(1),index500=list(1), season=list(3, "spring-summer")), 
                      ngrid=gradientlength) %>%
             predict(m, Gradient = ., expected = TRUE) %>%
             as.data.frame() %>%
             mutate(species=c("Sciurus vulgaris","Sciurus carolinensis")) %>%
             pivot_longer(!species, names_to = "genome", values_to = "value") %>%
             mutate(genome = sub("(.*\\..*\\.)[^.]+.*", "\\1", genome)) %>% #remove iteration suffix
             mutate(genome =  sub("\\.$", "", genome))

# levels(m$XData$species)
# levels(m$XData$season)

# Species predictions in autumn
pred_sp_aut <- constructGradient(m, 
                      focalVariable = "species", 
                      non.focalVariables = list(logseqdepth=list(1),index500=list(1), season=list(3, "autumn")), 
                      ngrid=gradientlength) %>%
             predict(m, Gradient = ., expected = TRUE) %>%
             as.data.frame() %>%
             mutate(species=c("Sciurus vulgaris","Sciurus carolinensis")) %>%
             pivot_longer(!species, names_to = "genome", values_to = "value") %>%
             mutate(genome = sub("(.*\\..*\\.)[^.]+.*", "\\1", genome)) %>% #remove iteration suffix
             mutate(genome =  sub("\\.$", "", genome))

# Species predictions in winter
pred_sp_win <- constructGradient(m, 
                      focalVariable = "species", 
                      non.focalVariables = list(logseqdepth=list(1),index500=list(1), season=list(3, "winter")), 
                      ngrid=gradientlength) %>%
             predict(m, Gradient = ., expected = TRUE) %>%
             as.data.frame() %>%
             mutate(species=c("Sciurus vulgaris","Sciurus carolinensis")) %>%
             pivot_longer(!species, names_to = "genome", values_to = "value") %>%
             mutate(genome = sub("(.*\\..*\\.)[^.]+.*", "\\1", genome)) %>% #remove iteration suffix
             mutate(genome =  sub("\\.$", "", genome))

# predictions plots
plot_sp_spring <- pred_sp_spring %>%
      pivot_wider(names_from = species, values_from = value) %>%
      unnest(c(`Sciurus carolinensis`, `Sciurus vulgaris`)) %>%
      mutate(diff.grey_red = `Sciurus carolinensis` - `Sciurus vulgaris`) %>%
      select(genome, diff.grey_red) %>%
      left_join(genome_metadata, by=join_by(genome==genome)) %>%
      mutate(genome= factor(genome, levels=hmsc_tree$tip.label)) %>%
      ggplot(., aes(y=genome, x=diff.grey_red, fill=phylum, color=phylum)) +
          scale_color_manual(values=custom_colors)+
          geom_vline(xintercept = 0)+
          scale_fill_manual(values=alpha(custom_colors,0.3))+
          geom_boxplot(outlier.shape = NA) +
          theme_classic() +
          theme(axis.text.y = element_blank())
plot_sp_aut <- pred_sp_aut %>%
      pivot_wider(names_from = species, values_from = value) %>%
      unnest(c(`Sciurus carolinensis`, `Sciurus vulgaris`)) %>%
      mutate(diff.grey_red = `Sciurus carolinensis` - `Sciurus vulgaris`) %>%
      select(genome, diff.grey_red) %>%
      left_join(genome_metadata, by=join_by(genome==genome)) %>%
      mutate(genome= factor(genome, levels=hmsc_tree$tip.label)) %>%
      ggplot(., aes(y=genome, x=diff.grey_red, fill=phylum, color=phylum)) +
          scale_color_manual(values=custom_colors)+
          geom_vline(xintercept = 0)+
          scale_fill_manual(values=alpha(custom_colors,0.3))+
          geom_boxplot(outlier.shape = NA) +
          theme_classic() +
          theme(legend.position = "none",,
                axis.text.y = element_blank(),
                axis.title.y = element_blank())
plot_sp_win <- pred_sp_win %>%
      pivot_wider(names_from = species, values_from = value) %>%
      unnest(c(`Sciurus carolinensis`, `Sciurus vulgaris`)) %>%
      mutate(diff.grey_red = `Sciurus carolinensis` - `Sciurus vulgaris`) %>%
      select(genome, diff.grey_red) %>%
      left_join(genome_metadata, by=join_by(genome==genome)) %>%
      mutate(genome= factor(genome, levels=hmsc_tree$tip.label)) %>%
      ggplot(., aes(y=genome, x=diff.grey_red, fill=phylum, color=phylum)) +
          scale_color_manual(values=custom_colors)+
          geom_vline(xintercept = 0)+
          scale_fill_manual(values=alpha(custom_colors,0.3))+
          geom_boxplot(outlier.shape = NA) +
          theme_classic() +
          theme(legend.position = "none",
                axis.text.y = element_blank(),
                axis.title.y = element_blank())

legend <- get_legend(plot_sp_spring)


ggarrange(plot_sp_spring, plot_sp_aut, plot_sp_win, #+ rremove("x.text"), 
          legend.grob = legend, legend="right", common.legend = TRUE,
          #labels = c("A", "B", "C"),
          ncol = 3, nrow = 1)

pred_sp_spring %>%
      left_join(genome_metadata, by=join_by(genome==genome)) %>%
      mutate(genome= factor(genome, levels=hmsc_tree$tip.label)) %>%
      ggplot(aes(x=value, y=phylum, group=species.x, fill=species.x, color=species.x)) +
      geom_boxplot() +
      scale_color_manual(name="species",
          breaks=c("Sciurus carolinensis","Sciurus vulgaris"),
          labels=c("Sciurus carolinensis","Sciurus vulgaris"),
          values=c("#999999", "#cc3333")) +
      scale_fill_manual(name="species",
          breaks=c("Sciurus carolinensis","Sciurus vulgaris"),
          labels=c("Sciurus carolinensis","Sciurus vulgaris"),
          values=c("#bfbfbf", "#db7070")) +
      facet_grid(phylum ~ ., space="free", scales="free") +
              theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
                    axis.text.y = element_blank(),
                    strip.text.y = element_text(angle = 0)) + 
      labs(y="Phylum",x="log abundance")

pred_sp_aut %>%
      left_join(genome_metadata, by=join_by(genome==genome)) %>%
      mutate(genome= factor(genome, levels=hmsc_tree$tip.label)) %>%
      ggplot(aes(x=value, y=phylum, group=species.x, fill=species.x, color=species.x)) +
      geom_boxplot() +
      scale_color_manual(name="species",
          breaks=c("Sciurus carolinensis","Sciurus vulgaris"),
          labels=c("Sciurus carolinensis","Sciurus vulgaris"),
          values=c("#999999", "#cc3333")) +
      scale_fill_manual(name="species",
          breaks=c("Sciurus carolinensis","Sciurus vulgaris"),
          labels=c("Sciurus carolinensis","Sciurus vulgaris"),
          values=c("#bfbfbf", "#db7070")) +
      facet_grid(phylum ~ ., space="free", scales="free") +
              theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
                    axis.text.y = element_blank(),
                    strip.text.y = element_text(angle = 0)) + 
      labs(y="Phylum",x="log abundance")

pred_sp_win %>%
      left_join(genome_metadata, by=join_by(genome==genome)) %>%
      mutate(genome= factor(genome, levels=hmsc_tree$tip.label)) %>%
      ggplot(aes(x=value, y=phylum, group=species.x, fill=species.x, color=species.x)) +
      geom_boxplot() +
      scale_color_manual(name="species",
          breaks=c("Sciurus carolinensis","Sciurus vulgaris"),
          labels=c("Sciurus carolinensis","Sciurus vulgaris"),
          values=c("#999999", "#cc3333")) +
      scale_fill_manual(name="species",
          breaks=c("Sciurus carolinensis","Sciurus vulgaris"),
          labels=c("Sciurus carolinensis","Sciurus vulgaris"),
          values=c("#bfbfbf", "#db7070")) +
      facet_grid(phylum ~ ., space="free", scales="free") +
              theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
                    axis.text.y = element_blank(),
                    strip.text.y = element_text(angle = 0)) + 
      labs(y="Phylum",x="log abundance")

Support values of the host species variable per genome are interpreted as enrichment for either red (S. vulgaris) or grey squirrels (S. carolinensis)

species_enrichment <- post_estimates %>%
    filter(variable=="speciesSciurus carolinensis") %>%
    mutate(enrichment = case_when(
          value >= support_threshold ~ "Sciurus carolinensis",
          value <= negsupport_threshold ~ "Sciurus vulgaris",
          TRUE ~ "Neutral")) %>%
    select(genome, enrichment)

The most likely microbiome compositions of red (S. vulgaris) and grey squirrels (S. carolinensis) as predicted by the hmsc model

gradient = c("Sciurus vulgaris","Sciurus carolinensis")
gradientlength = length(gradient)

pred_sp <- constructGradient(m, 
                      focalVariable = "species", 
                      non.focalVariables = 1) %>%
            predict(m, Gradient = ., expected = TRUE) %>%
            do.call(rbind,.) %>%
            as.data.frame() %>%
            mutate(species=rep(gradient,1000)) %>%
            pivot_longer(!species,names_to = "genome", values_to = "value")

pred_sp_prova <- constructGradient(m, 
                      focalVariable = "species", 
                      non.focalVariables = list(logseqdepth=list(1),index500=list(1), season=list(1)), 
                      ngrid=gradientlength) %>%
             predict(m, Gradient = ., expected = TRUE) %>%
             as.data.frame() %>%
             mutate(species=c("Sciurus vulgaris","Sciurus carolinensis")) %>%
             pivot_longer(!species, names_to = "genome", values_to = "value") %>%
             mutate(genome = sub("(.*\\..*\\.)[^.]+.*", "\\1", genome)) %>% #remove iteration suffix
             mutate(genome =  sub("\\.$", "", genome))

pred_sp_spring <- constructGradient(m, 
                      focalVariable = "species", 
                      non.focalVariables = list(logseqdepth=list(1),index500=list(1), season=list(3, "spring-summer")), 
                      ngrid=gradientlength) %>%
             predict(m, Gradient = ., expected = TRUE) %>%
             as.data.frame() %>%
             mutate(species=c("Sciurus vulgaris","Sciurus carolinensis")) %>%
             pivot_longer(!species, names_to = "genome", values_to = "value") %>%
             mutate(genome = sub("(.*\\..*\\.)[^.]+.*", "\\1", genome)) %>% #remove iteration suffix
             mutate(genome =  sub("\\.$", "", genome))

Log-abundance differences between Sciurus vulgaris and S. carolinensis for genera found enriched in either species.

genome_metadata_clean <- genome_metadata %>%  
    mutate(family = coalesce(family, paste('Unclassified', order)),
           genus = coalesce(genus, 
                              if_else(grepl('^Unclassified', family),
                                      family, paste('Unclassified', family))))

pred_sp %>%
  left_join(species_enrichment,by="genome") %>% 
      filter(enrichment != "Neutral") %>% 
      pivot_wider(names_from = species, values_from = value) %>%
      unnest(c(`Sciurus vulgaris`, `Sciurus carolinensis`)) %>%
      mutate(diff_Sv.Sc = `Sciurus vulgaris`-`Sciurus carolinensis`) %>%
      select(genome, diff_Sv.Sc) %>%
      left_join(genome_metadata_clean, by="genome") %>%
      mutate(genome= factor(genome, levels=genome_tree$tip.label)) %>%
      ggplot(., aes(y=forcats::fct_reorder(genus,diff_Sv.Sc), x=diff_Sv.Sc, fill=phylum, color=phylum)) +
            geom_boxplot(outlier.shape = NA) +
            scale_color_manual(values=custom_colors)+
            scale_fill_manual(values=alpha(custom_colors,0.4))+
          geom_vline(xintercept = 0)+
          theme_classic()+
      labs(x = "Red-grey abundance difference", y = "Genera")

9.6.2 Predicted composition by urbanization

gradient = seq(0,1, by=0.1)
gradientlength = length(gradient)

pred_urban <- constructGradient(m,
                      focalVariable = "index500",
                      non.focalVariables = list(logseqdepth=list(1),species=list(1), season=list(1)),
                      ngrid=gradientlength) %>%
                      predict(m, Gradient = ., expected = TRUE) %>%
                      do.call(rbind,.) %>%
                      as.data.frame() %>%
                      mutate(index500=rep(gradient,1000)) %>%
                      pivot_longer(-c(index500), names_to = "genome", values_to = "value")

post_estimates %>%
    filter(variable=="index500") %>%
    select(genome,trend) %>%
    left_join(pred_urban, by=join_by(genome==genome)) %>%
    group_by(genome, trend, index500) %>%
    summarize(value = mean(value, na.rm = TRUE)) %>%
    left_join(genome_metadata, by=join_by(genome == genome)) %>%
    ggplot(aes(x=index500, y=value, group=genome, color=phylum, linetype=trend)) +
        geom_line() +
        scale_linetype_manual(values=c("solid","dashed","solid")) +
        scale_color_manual(values=custom_colors) +
        facet_grid(fct_rev(trend) ~ phylum) +
        labs(y="Genome abundance (log)",x="Urbanization index") +
        theme(legend.position = "none") +
        theme_minimal() +
         theme(legend.position = "none",
               axis.text.x = element_text(angle = 45, hjust = 0.8,),
               axis.line.x = element_line(size = 0.5, linetype = "solid", colour = "black"),
               strip.text.x = element_text(angle = 90, hjust = 0,),
               strip.text.y = element_text(face="bold"),
)

9.6.3 Predicted composition by season

season_enrichment <- post_estimates %>%
    filter(variable=="seasonautumn") %>%
    mutate(enrichment = case_when(
          value >= support_threshold ~ "autumn",
          value <= negsupport_threshold ~ "spring-summer",
          TRUE ~ "neutral")) %>%
    select(genome, enrichment)

# m$covNames

The most likely microbiome compositions by season in red (S. vulgaris) and grey squirrels (S. carolinensis) as predicted by the hmsc model

gradient = c("spring-summer","autumn", "winter")
gradientlength = length(gradient)

pred_season_Sc <- constructGradient(m, 
                      focalVariable = "season", 
                      non.focalVariables = list(logseqdepth=list(1),species=list(3, "Sciurus carolinensis"), index500=list(1)),
                      ngrid=gradientlength) %>%
            predict(m, Gradient = ., expected = TRUE) %>%
            do.call(rbind,.) %>%
            as.data.frame() %>%
            mutate(season=rep(gradient,1000)) %>%
            pivot_longer(!season,names_to = "genome", values_to = "value")

pred_season_Sv <- constructGradient(m, 
                      focalVariable = "season", 
                      non.focalVariables = list(logseqdepth=list(1),species=list(3, "Sciurus vulgaris"), index500=list(1)),
                      ngrid=gradientlength) %>%
            predict(m, Gradient = ., expected = TRUE) %>%
            do.call(rbind,.) %>%
            as.data.frame() %>%
            mutate(season=rep(gradient,1000)) %>%
            pivot_longer(!season,names_to = "genome", values_to = "value")

Log-abundance differences between autumn and spring-summer in Sciurus vulgaris and S. carolinensis for genera found enriched in either season.

genome_metadata_clean <- genome_metadata %>%  
    mutate(family = coalesce(family, paste('Unclassified', order)),
           genus = coalesce(genus, 
                              if_else(grepl('^Unclassified', family),
                                      family, paste('Unclassified', family))))

pred_season_Sc %>%
  left_join(season_enrichment,by="genome") %>%
      filter(enrichment != "Neutral") %>%
      pivot_wider(names_from = season, values_from = value) %>%
      unnest(c(`autumn`, `spring-summer`)) %>%
      mutate(diff_Sc = `autumn`-`spring-summer`) %>%
      select(genome, diff_Sc) %>%
      left_join(genome_metadata_clean, by="genome") %>%
      mutate(genome= factor(genome, levels=genome_tree$tip.label)) %>%
      ggplot(., aes(y=forcats::fct_reorder(genus,diff_Sc), x=diff_Sc, fill=phylum, color=phylum)) +
            geom_boxplot(outlier.shape = NA) +
            scale_color_manual(values=custom_colors)+
            scale_fill_manual(values=alpha(custom_colors,0.4))+
          geom_vline(xintercept = 0)+
          theme_classic()+
      labs(x = "Abundance in Sc (autumn-spring)", y = "Genera")

pred_season_Sv %>%
  left_join(season_enrichment,by="genome") %>%
      filter(enrichment != "Neutral") %>%
      pivot_wider(names_from = season, values_from = value) %>%
      unnest(c(`autumn`, `spring-summer`)) %>%
      mutate(diff_Sv = `autumn`-`spring-summer`) %>%
      select(genome, diff_Sv) %>%
      left_join(genome_metadata_clean, by="genome") %>%
      mutate(genome= factor(genome, levels=genome_tree$tip.label)) %>%
      ggplot(., aes(y=forcats::fct_reorder(genus,diff_Sv), x=diff_Sv, fill=phylum, color=phylum)) +
            geom_boxplot(outlier.shape = NA) +
            scale_color_manual(values=custom_colors)+
            scale_fill_manual(values=alpha(custom_colors,0.4))+
          geom_vline(xintercept = 0)+
          theme_classic()+
      labs(x = "Abundance in Sv (autumn-spring)", y = "Genera")

Prova season

pred_season_Sc$season <- factor(pred_season_Sc$season, levels = c("spring-summer", "autumn", "winter"))
pred_season_Sc %>%
  group_by(genome, season) %>%
  summarize(mean = mean(value, na.rm = TRUE)) %>%
  left_join(genome_metadata, by=join_by(genome==genome)) %>%
  ggplot(aes(x=season, y=mean, group=genome, color=phylum)) +
        geom_line(size=.5) +
        #scale_linetype_manual(values=c("solid","dashed","solid")) +
        scale_color_manual(values=custom_colors) +
        facet_wrap( ~ phylum, ncol = 3) +
        labs(y="Genome abundance (log)",x="Season") +
        theme(legend.position = "none") +
        theme_minimal() +
         theme(legend.position = "none",
               axis.text.x = element_text(angle = 45, hjust = 1,),
               axis.line.x = element_line(size = 0.5, linetype = "solid", colour = "black"),
               strip.text.x = element_text(angle = 0, hjust = 0.5,),
               strip.text.y = element_text(face="bold"),
)

pred_season_Sv$season <- factor(pred_season_Sv$season, levels = c("spring-summer", "autumn", "winter"))
pred_season_Sv %>%
  group_by(genome, season) %>%
  summarize(mean = mean(value, na.rm = TRUE)) %>%
  left_join(genome_metadata, by=join_by(genome==genome)) %>%
  ggplot(aes(x=season, y=mean, group=genome, color=phylum)) +
        geom_line() +
        #scale_linetype_manual(values=c("solid","dashed","solid")) +
        scale_color_manual(values=custom_colors) +
        facet_wrap( ~ phylum, ncol = 3) +
        labs(y="Genome abundance (log)",x="Season") +
        theme(legend.position = "none") +
        theme_minimal() +
         theme(legend.position = "none",
               axis.text.x = element_text(angle = 45, hjust = 1,),
               axis.line.x = element_line(size = 0.5, linetype = "solid", colour = "black"),
               strip.text.x = element_text(angle = 0, hjust = 0.5,),
               strip.text.y = element_text(face="bold"),
)

9.7 Microbiome functional predictions

tss <- function(abund){sweep(abund, 2, colSums(abund), FUN="/")} 

genome_counts <- genome_counts %>%
  column_to_rownames(var="genome")

#Get list of present MAGs
present_MAGs <- genome_counts %>%
  filter(rownames(.) %in% m$spNames) %>%
  rownames()

#Align distillr annotations with present MAGs and remove all-zero and all-one traits
present_MAGs <- present_MAGs[present_MAGs %in% rownames(genome_gifts)]

GIFTs_elements <- genome_gifts[present_MAGs,] %>%
  select_if(~!all(. == 0)) %>%  #remove all-zero modules
  select_if(~!all(. == 1)) #remove all-one modules

#Aggregate element-level GIFTs into the function level
GIFTs_functions <- to.functions(GIFTs_elements,GIFT_db)

#Aggregate function-level GIFTs into overall Biosynthesis, Degradation and Structural GIFTs and get overall metabolic capacity indices per MAG (at the domain level)
GIFTs_domains <- to.domains(GIFTs_functions,GIFT_db) %>% as.data.frame() %>%
  mutate(Overall=rowMeans(select(.,Biosynthesis,Structure,Degradation), na.rm=TRUE))

9.7.1 Genome-specific GIFT profiles

# mag_elements <- GIFTs_elements %>%
#   as_tibble(., rownames = "MAG") %>%
#   reshape2::melt() %>%
#   rename(Code_element = variable, GIFT = value) %>%
#   inner_join(GIFT_db,by="Code_element") %>%
#   # arrange(Code_function) %>%    # First sort by val. This sort the dataframe but NOT the factor levels
#   # mutate(Functions=factor(Function, levels=Function)) %>%
#   ggplot(., aes(y=Code_element, x=MAG, fill=GIFT))+
#     geom_tile()+
#     scale_y_discrete(guide = guide_axis(check.overlap = TRUE))+
#     scale_x_discrete(guide = guide_axis(check.overlap = TRUE))+
#     scale_fill_gradientn(limits = c(0,1), colours=brewer.pal(7, "YlGnBu"))+
#     #facet_grid(Function ~ ., scales = "free", space = "free")+
#     theme_grey(base_size=8)+
#     theme(axis.text.x = element_blank(),
#           strip.text.y = element_text(angle = 0),
#           axis.text.y = element_blank(),
#           legend.position="none")
# 
# # Generate the phylum color heatmap
# phylum_heatmap <- phylum_colors %>%
#   right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
#   arrange(match(genome, genome_tree$tip.label)) %>%
#   select(genome,phylum) %>%
#   mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
#   column_to_rownames(var = "genome")
# 
# #Generate a basal utrametric tree for the sake of visualisation
# gift_tree <- force.ultrametric(postestimates_tree,method="extend") %>%
#    ggtree(., expand=1, layout="dendrogram")
# 
# #Add phylum colors next to the tree tips
# gift_tree <- gheatmap(gift_tree, phylum_heatmap, offset=0, width=0.2, colnames=FALSE, color = NA) +
#    scale_fill_manual(values=custom_colors) +
#     labs(fill="Phylum") + theme(legend.position="none")
# 
# 
# gift_color <- gift_colors %>% pull(Color)
# names(gift_color) <- gift_colors$Function
# 
# function_heatmap <- GIFTs_elements %>%
#   as_tibble(., rownames = "MAG") %>%
#   reshape2::melt() %>%
#   rename(Code_element = variable, GIFT = value) %>%
#   inner_join(GIFT_db, by = "Code_element") %>%
#   select(Code_element, Function) %>%
#   distinct() %>%
#   ggplot(aes(y = Code_element)) +
#   geom_tile(aes(x = 1, fill = Function, color = Function), width = 0.1, height = 0.9) +
#   geom_text(data = . %>% distinct(Function, .keep_all = TRUE),
#             aes(x = 1.08, label = Function), hjust = 0, vjust = -0.3, size=2.8) +
#   scale_fill_manual(values = gift_color) +
#   scale_color_manual(values = gift_color) +
#   #facet_grid(Function ~ ., scales = "free", space = "free") +
#   theme_void() +
#   theme(
#     axis.title.x = element_blank(),
#     axis.title.y = element_blank(),
#     axis.text.x = element_blank(),
#     axis.ticks.x = element_blank(),
#     axis.text.y = element_blank(),
#     axis.ticks.y = element_blank(),
#     strip.text.y = element_blank(),
#     legend.position="none"
#   ) +
#   xlim(0.95, 1.5)
# 
# 
# 
# mag_elements %>% insert_top(gift_tree, height = .3) %>% insert_right(function_heatmap, width=.3)


#vertical version (y=MAGs, x=element)

mag_elements <- GIFTs_elements %>%
  as_tibble(., rownames = "MAG") %>%
  reshape2::melt() %>%
  rename(Code_element = variable, GIFT = value) %>%
  inner_join(GIFT_db,by="Code_element") %>%
  # arrange(Code_function) %>%    # First sort by val. This sort the dataframe but NOT the factor levels
  # mutate(Functions=factor(Function, levels=Function)) %>%
  ggplot(., aes(x=Code_element, y=MAG, fill=GIFT))+
    geom_tile()+
    scale_y_discrete(guide = guide_axis(check.overlap = TRUE))+
    scale_x_discrete(guide = guide_axis(check.overlap = TRUE))+
    scale_fill_gradientn(limits = c(0,1), colours=brewer.pal(7, "YlGnBu"))+
    #facet_grid(Function ~ ., scales = "free", space = "free")+
    theme_grey(base_size=3)+
    theme(axis.text.x = element_blank(),
          strip.text.y = element_text(angle = 0),
          axis.text.y = element_blank(),
          axis.title.y= element_blank(),
          legend.position="none")
Using MAG as id variables
Warning in inner_join(., GIFT_db, by = "Code_element"): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 2065 of `x` matches multiple rows in `y`.
ℹ Row 1 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship = "many-to-many"` to silence this warning.
# Generate the phylum color heatmap
phylum_heatmap <- phylum_colors %>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
  arrange(match(genome, hmsc_tree$tip.label)) %>%
  select(genome,phylum) %>%
  mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
  column_to_rownames(var = "genome")

#Generate a basal utrametric tree for the sake of visualisation
gift_tree <- force.ultrametric(hmsc_tree,method="extend") %>%
   ggtree(., expand=1)
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
Warning in stat_tree(data = data, mapping = mapping, geom = "segment", position = position, : Ignoring unknown parameters: `expand`
Warning in stat_tree(data = data, mapping = mapping, geom = "segment", position = position, : Ignoring unknown parameters: `expand`
#Add phylum colors next to the tree tips
gift_tree <- gheatmap(gift_tree, phylum_heatmap, offset=0, width=0.3, colnames=FALSE, color = NA) +
   scale_fill_manual(values=custom_colors) +
    labs(fill="Phylum") + theme(legend.position="none")
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.
gift_color <- gift_colors %>% pull(Color)
names(gift_color) <- gift_colors$Function

function_heatmap_top <- GIFTs_elements %>%
  as_tibble(., rownames = "MAG") %>%
  reshape2::melt() %>%
  rename(Code_element = variable, GIFT = value) %>%
  inner_join(GIFT_db, by = "Code_element") %>%
  select(Code_element, Function) %>%
  distinct() %>%
  ggplot(aes(x = Code_element)) +  # Use x = Code_element to place tiles along the x-axis
  geom_tile(aes(y = 1, fill = Function, color = Function), width = 0.9, height = 0.08) +  # Adjust width/height
  geom_text(data = . %>% distinct(Function, .keep_all = TRUE),
            aes(y = 1.07, label = Function), vjust = 0.8, hjust = 0, size = 3, angle = 90) +  # Adjust text placement
  scale_fill_manual(values = gift_color) +
  scale_color_manual(values = gift_color) +
  theme_void() +
  theme(
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.text.x = element_blank(),
    axis.ticks.x = element_blank(),
    strip.text.x = element_blank(),
    legend.position = "none",
    plot.margin = margin(10, 100, 10, 10),
    strip.clip='off'
  ) +
  ylim(0.95, 1.5) +
  coord_cartesian(clip = "off")# Adjust y-limits to control label position
Using MAG as id variables
Warning in inner_join(., GIFT_db, by = "Code_element"): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 2065 of `x` matches multiple rows in `y`.
ℹ Row 1 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship = "many-to-many"` to silence this warning.
mag_elements %>% insert_left(gift_tree, width = .3) %>% insert_top(function_heatmap_top, height=.3)

9.7.2 Function-level predictions

9.7.2.1 Predicted functions by species

community_func_sp <- pred_sp %>%
  filter(genome %in% present_MAGs) %>% #keep only predictive mags -> NB: I can't get predictive mags rn (see Model fit)
  group_by(species, genome) %>%
  mutate(row_id = row_number()) %>%
  pivot_wider(names_from = genome, values_from = value) %>%
  ungroup() %>%
  group_split(row_id) %>%
  as.list() %>%
  lapply(., FUN = function(x){x %>%
    dplyr::select(-row_id) %>%
    column_to_rownames(var = "species") %>%
    as.data.frame() %>%
    exp() %>%
    t() %>%
    tss() %>%
    to.community(GIFTs_functions,.,GIFT_db) %>% 
    as.data.frame() %>%
    rownames_to_column(var="species")
   })

function_pred_sp <- map_dfr(community_func_sp, function(df) {
  Sc_values <- df %>% filter(species == "Sciurus carolinensis") %>% select(-species)
  Sv_values <- df %>% filter(species == "Sciurus vulgaris") %>% select(-species)
  Sc_values - Sv_values
}) %>%
  mutate(iteration=c(1:1000)) %>% 
  pivot_longer(!iteration,names_to="trait",values_to="value") %>%
      group_by(trait) %>%
      summarise(mean=mean(value),
        p1 = quantile(value, probs = 0.1),
        p9 = quantile(value, probs = 0.9),
        positive_support = sum(value > 0)/1000,
        negative_support = sum(value < 0)/1000) %>%
      arrange(-positive_support)
# Positively associated with Sc
function_pred_sp %>%
  filter(mean >0) %>%
  arrange(-positive_support) %>%
  filter(positive_support>=0.9) %>%
  paged_table()
# Negatively associated with Sc
function_pred_sp %>%
  filter(mean <0) %>%
  arrange(-negative_support) %>%
  filter(negative_support>=0.9) %>%
  paged_table()
positive <- function_pred_sp %>%
  filter(mean >0) %>%
  arrange(mean) %>%
  filter(positive_support>=0.9) %>%
  dplyr::select(-negative_support) %>%
  rename(support=positive_support)

negative <- function_pred_sp %>%
  filter(mean <0) %>%
  arrange(mean) %>%
  filter(negative_support>=0.9) %>%
  dplyr::select(-positive_support) %>%
  rename(support=negative_support)


all_functions <- function_pred_sp %>%
  left_join(GIFT_db,by=join_by(trait==Code_function)) %>%
  mutate(trait=factor(trait)) %>%
  mutate(function_legend=str_c(trait," - ",Function)) %>%
  select(trait,mean,p1,p9,function_legend) %>% 
  unique()

gift_colors <- gift_colors %>% 
  mutate(legend=str_c(Code_function," - ",Function))  %>% 
  filter(legend %in% all_functions$function_legend)

all_functions %>%
  ggplot(aes(x=mean, y=fct_reorder(function_legend, mean), xmin=p1, xmax=p9, color=function_legend)) +
      geom_point() +
      geom_errorbar() +
      #xlim(c(-0.12,0.05)) +
      geom_vline(xintercept=0) +
      scale_color_manual(values = gift_colors$Color) +
      theme_minimal() +
      labs(x="Regression coefficient (Sv as reference)",y="Functional trait") +
      #guides(col = guide_legend(ncol = 1))
      theme(legend.position = "none")

9.7.2.2 Predicted functions by urbanization

community_func_urb <- pred_urban %>%
  group_by(index500, genome) %>%
  mutate(row_id = row_number()) %>%
  pivot_wider(names_from = genome, values_from = value) %>%
  ungroup() %>%
  group_split(row_id) %>%
  as.list() %>%
  lapply(., FUN = function(x){x %>%
    select(-row_id) %>%
    column_to_rownames(var = "index500") %>%
    as.data.frame() %>%
    exp() %>%
    t() %>%
    tss() %>%
    to.community(GIFTs_functions,.,GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="index500")
   })


calculate_slope <- function(x) {
  lm_fit <- lm(unlist(x) ~ seq_along(unlist(x)))
  coef(lm_fit)[2]
}

function_pred_urb <- map_dfc(community_func_urb, function(mat) {
      mat %>%
        column_to_rownames(var = "index500") %>%
        t() %>%
        as.data.frame() %>%
        rowwise() %>%
        mutate(slope = calculate_slope(c_across(everything()))) %>%
        select(slope) }) %>%
      t() %>%
      as.data.frame() %>%
      set_names(colnames(community_func_urb[[1]])[-1]) %>%
      rownames_to_column(var="iteration") %>%
      pivot_longer(!iteration, names_to="trait",values_to="value") %>%
      group_by(trait) %>%
      summarise(mean=mean(value),
        p1 = quantile(value, probs = 0.1),
        p9 = quantile(value, probs = 0.9),
        positive_support = sum(value > 0)/1000,
        negative_support = sum(value < 0)/1000) %>%
      arrange(-positive_support)
# Positively associated with urbanisation
function_pred_urb %>%
  filter(mean >0) %>%
  arrange(-positive_support) %>%
  filter(positive_support>=0.9) %>%
  paged_table()
#Negatively associated with urbanisation
function_pred_urb %>%
  filter(mean <0) %>%
  arrange(-negative_support) %>%
  filter(negative_support>=0.9) %>%
  paged_table()
#Positively associated
positive <- function_pred_urb%>%
  filter(mean >0) %>%
  arrange(mean) %>%
  filter(positive_support>=0.9) %>%
  select(-negative_support) %>%
  rename(support=positive_support)

#Negatively associated
negative <- function_pred_urb %>%
  filter(mean <0) %>%
  arrange(mean) %>%
  filter(negative_support>=0.9) %>%
  select(-positive_support) %>%
  rename(support=negative_support)

all_functions <- function_pred_urb %>%
  left_join(GIFT_db,by=join_by(trait==Code_function)) %>%
  mutate(trait=factor(trait)) %>%
  mutate(function_legend=str_c(trait," - ",Function)) %>%
  select(trait,mean,p1,p9,function_legend) %>% 
  unique()

gift_colors <- read_tsv("data/gift_colors.tsv") %>% 
  mutate(legend=str_c(Code_function," - ",Function))  %>% 
  filter(legend %in% all_functions$function_legend)

all_functions %>%
  ggplot(aes(x=mean, y=fct_reorder(function_legend, mean), xmin=p1, xmax=p9, color=function_legend)) +
      geom_point() +
      geom_errorbar() +
      xlim(c(-0.017,0.017)) +
      geom_vline(xintercept=0) +
      scale_color_manual(values = gift_colors$Color) +
      theme_minimal() +
      labs(x="Regression coefficient",y="Functional trait") +
      #guides(col = guide_legend(ncol = 1))
      theme(legend.position = "none")

community_func_urb %>%
    bind_rows() %>%
    pivot_longer(-index500, names_to = "trait", values_to = "value") %>%
    filter(trait %in% c(positive$trait, negative$trait)) %>%
    mutate(trait=factor(trait, levels=c(positive$trait, negative$trait))) %>%
    mutate(index500=as.numeric(index500)) %>%
    ggplot(aes(x=index500, y=value)) +
          geom_smooth(method = lm, formula = y ~ x, se = TRUE) +
          #geom_smooth(method = lm, formula = y ~ splines::bs(x, 3), se = TRUE) +
          facet_wrap(~trait, ncol=5, scales="free") +
          theme_minimal() +
          labs(x="Urbanization",y="Metabolic Capacity Index") 

9.7.2.3 Predicted functions by season

S. carolinensis

S. vulgaris

9.7.3 Element-level predictions

9.7.3.1 Predicted elements by species

community_elem_sp <- pred_sp %>%
  group_by(species, genome) %>%
  mutate(row_id = row_number()) %>%
  pivot_wider(names_from = genome, values_from = value) %>%
  ungroup() %>%
  group_split(row_id) %>%
  as.list() %>%
  lapply(., FUN = function(x){x %>%
    select(-row_id) %>%
    column_to_rownames(var = "species") %>%
    as.data.frame() %>%
    exp() %>%
    t() %>%
    tss() %>%
    to.community(GIFTs_elements,.,GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="species")
   })

element_pred_sp <- map_dfr(community_elem_sp, function(df) {
  Sc_values <- df %>% filter(species == "Sciurus carolinensis") %>% select(-species)
  Sv_values <- df %>% filter(species == "Sciurus vulgaris") %>% select(-species)
  Sc_values - Sv_values
}) %>%
  mutate(iteration=c(1:1000)) %>% 
  pivot_longer(!iteration,names_to="trait",values_to="value") %>%
      group_by(trait) %>%
      summarise(mean=mean(value),
        p1 = quantile(value, probs = 0.1),
        p9 = quantile(value, probs = 0.9),
        positive_support = sum(value > 0)/1000,
        negative_support = sum(value < 0)/1000) %>%
      arrange(-positive_support)

# Positively associated
element_pred_sp %>%
  filter(mean >0) %>%
  arrange(-positive_support) %>%
  filter(positive_support>=0.9) %>%
  paged_table()

# Negatively associated
element_pred_sp %>%
  filter(mean <0) %>%
  arrange(-negative_support) %>%
  filter(negative_support>=0.9) %>%
  paged_table()
positive <- element_pred_sp %>%
  filter(mean >0) %>%
  arrange(mean) %>%
  filter(positive_support>=0.9) %>%
  dplyr::select(-negative_support) %>%
  rename(support=positive_support)

negative <- element_pred_sp %>%
  filter(mean <0) %>%
  arrange(mean) %>%
  filter(negative_support>=0.9) %>%
  dplyr::select(-positive_support) %>%
  rename(support=negative_support)

all_elements <- bind_rows(positive,negative) %>%
  left_join(GIFT_db,by=join_by(trait==Code_element)) %>%
  mutate(trait=factor(trait,levels=c(rev(positive$trait),rev(negative$trait)))) %>%
  mutate(Code_function=factor(Code_function)) %>%
  mutate(element_legend=str_c(trait," - ",Element)) %>%
  mutate(function_legend=str_c(Code_function," - ",Function)) %>%
  select(trait,mean,p1,p9,element_legend,function_legend) %>% 
  unique()

gift_colors <- gift_colors %>% 
  mutate(legend=str_c(Code_function," - ",Function))  %>% 
  filter(legend %in% all_elements$function_legend)

all_elements %>%
  ggplot(aes(x=mean, y=fct_reorder(element_legend, mean), xmin=p1, xmax=p9, color=function_legend)) +
      geom_point() +
      geom_errorbar() +
      #xlim(c(-0.05,0.05)) +
      geom_vline(xintercept=0) +
      scale_color_manual(values = gift_colors$Color) +
      theme_minimal() +
      labs(x="Regression coefficient",y="Functional trait")

9.7.3.2 Predicted elements by urbanization

community_elem_urb <- pred_urban %>%
  group_by(index500, genome) %>%
  mutate(row_id = row_number()) %>%
  pivot_wider(names_from = genome, values_from = value) %>%
  ungroup() %>%
  group_split(row_id) %>%
  as.list() %>%
  lapply(., FUN = function(x){x %>%
    select(-row_id) %>%
    column_to_rownames(var = "index500") %>%
    as.data.frame() %>%
    exp() %>%
    t() %>%
    tss() %>%
    to.community(GIFTs_elements,.,GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="index500")
   })


calculate_slope <- function(x) {
  lm_fit <- lm(unlist(x) ~ seq_along(unlist(x)))
  coef(lm_fit)[2]
}

element_pred_urb <- map_dfc(community_elem_urb, function(mat) {
      mat %>%
        column_to_rownames(var = "index500") %>%
        t() %>%
        as.data.frame() %>%
        rowwise() %>%
        mutate(slope = calculate_slope(c_across(everything()))) %>%
        select(slope) }) %>%
      t() %>%
      as.data.frame() %>%
      set_names(colnames(community_elem_urb[[1]])[-1]) %>%
      rownames_to_column(var="iteration") %>%
      pivot_longer(!iteration, names_to="trait",values_to="value") %>%
      group_by(trait) %>%
      summarise(mean=mean(value),
        p1 = quantile(value, probs = 0.1),
        p9 = quantile(value, probs = 0.9),
        positive_support = sum(value > 0)/1000,
        negative_support = sum(value < 0)/1000) %>%
      arrange(-positive_support)
# Positively associated
element_pred_urb %>%
  filter(mean >0) %>%
  arrange(-positive_support) %>%
  filter(positive_support>=0.9) %>%
  paged_table()
#Negatively associated
element_pred_urb %>%
  filter(mean <0) %>%
  arrange(-negative_support) %>%
  filter(negative_support>=0.9) %>%
  paged_table()
#Positively associated
positive <- element_pred_urb%>%
  filter(mean >0) %>%
  arrange(mean) %>%
  filter(positive_support>=0.9) %>%
  select(-negative_support) %>%
  rename(support=positive_support)

#Negatively associated
negative <- element_pred_urb %>%
  filter(mean <0) %>%
  arrange(mean) %>%
  filter(negative_support>=0.9) %>%
  select(-positive_support) %>%
  rename(support=negative_support)

all_elements <- bind_rows(positive,negative) %>%
  left_join(GIFT_db,by=join_by(trait==Code_element)) %>%
  mutate(trait=factor(trait,levels=c(rev(positive$trait),rev(negative$trait)))) %>%
  mutate(Code_function=factor(Code_function)) %>%
  mutate(element_legend=str_c(trait," - ",Element)) %>%
  mutate(function_legend=str_c(Code_function," - ",Function)) %>%
  select(trait,mean,p1,p9,element_legend,function_legend) %>% 
  unique()

gift_colors <- gift_colors %>% 
  mutate(legend=str_c(Code_function," - ",Function))  %>% 
  filter(legend %in% all_elements$function_legend)

all_elements %>%
  ggplot(aes(x=mean, y=fct_reorder(element_legend, mean), xmin=p1, xmax=p9, color=function_legend)) +
      geom_point() +
      geom_errorbar() +
      #xlim(c(-0.05,0.05)) +
      geom_vline(xintercept=0) +
      scale_color_manual(values = gift_colors$Color) +
      theme_minimal() +
      labs(x="Regression coefficient",y="Functional trait")

community_elem_urb %>%
    bind_rows() %>%
    pivot_longer(-index500, names_to = "trait", values_to = "value") %>%
    filter(trait %in% c(positive$trait, negative$trait)) %>%
    mutate(trait=factor(trait, levels=c(positive$trait, negative$trait))) %>%
    mutate(index500=as.numeric(index500)) %>%
    ggplot(aes(x=index500, y=value)) +
          geom_smooth(method = lm, formula = y ~ x, se = TRUE) +
          #geom_smooth(method = lm, formula = y ~ splines::bs(x, 3), se = TRUE) +
          facet_wrap(~trait, ncol=6, scales="free") +
          theme_minimal() +
          labs(x="Urbanization",y="Metabolic Capacity Index")

9.7.3.3 Predicted elements by season

S. carolinensis

S. vulgaris